######################################################
# Script: Naturalisatie in Nederland: wie en waarom? #
# Floris Peters, Hans Schmeets & Maarten Vink        #
# Bevolkingstrends                                   #
######################################################


################
# Introduction #
################

# This file provides the syntax used to create all the tables and figures in the paper. 
# The dataset contains sensitive, micro level information. As such, for privacy reasons the data is only available to individuals employed at or affiliated to Statistics Netherlands. 
# The dataset can be found at the following location on the network of Statistics Netherlands: \\cbsp.nl\Productie\Projecten\SAL\209253UM_FP_SEC1\Werk\Floris\PhD\BT_naturalisation 

#######################################################################################################################
#######################################################################################################################

#############
# Variables #
#############

# ID
# (Individual identification number)

# START
# (Time vector - start of a given time period)

# STOP
# (Time vector - end of a given time period)

# EVENT
# (Acquiring Dutch citizenship)
# [0] The event does not occur during this time period; 
# [1] The event occurs at the end of this time period

# GENDER
# [0] Female; 
# [1] Male

# AGEARRIVAL
# (Age at the moment of migration in years)

# AGECAT 
# (Age at the moment of migration in categories) 
# [1] 15-17 years; 
# [2] 18-24 years; 
# [3] 25-34 years; 
# [4] 35-44 years; 
# [5] 45-54 years; 
# [6] 55-64 years; 
# [7] 65-74 years; 
# [8]: >74 years

# PARTNER
# [1] No partner; 
# [2] Native Dutch partner 
# [3] Foreign born foreign partner; 
# [4] Year naturalization partner; 
# [5] 1 year after naturalization partner; 
# [6] 2 years after naturalization partner; 
# [7] 3 years after naturalization partner; 
# [8] >3 years after naturalization partner 

# CHILD 
# (Age of the youngest child in the household in years)
# [999] No children in the household

# CHILDREC
# (Children in the household in categories)
# [1] Children <18 in household; 
# [2] no children <18 in household

# DEVELOPMENT
# (Human Development Index (HDI) score origin country)
  
# DEVCAT
# (Human Development index (HDI) score origin country in categories)
# [1] First quartile; 
# [2] Second quartile; 
# [3] Third quartile; 
# [4] Fourth quartile

# KAUFMANN
# (Kaufmann index [political stability] score origin country)

# KAUFMANNCAT
# (Kaufmann index [political stability] score origin country in categories)
# [1] First quartile; 
# [2] Second quartile; 
# [3] Third quartile; 
# [4] Fourth quartile

# DUALCITREC
# (Dual citizenship toleration in the origin country in categories)
# [0] No automatic loss dual nationality; 
# [1] automatic loss dual nationality)

#######################################################################################################################
#######################################################################################################################

#upload the dataset (Data_cit_acq_main.sav) to R and load the necessary libraries
library(survival)
library(splines)
library(foreign)
dataset_main <- read.csv(file.choose(),header=T,sep=";")


#############     
# Table 3.1 #
#############  

#select last observation in survival structure (i.e. last observation or event)
dataset_main_last_obs <- dataset_main %>% 
  group_by(ID) %>%
  summarise(lastocc = last(YEAR))

#proportion naturalised (last observation per individual) by subgroups
prop.table(dataset_main_last_obs$GENDER,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$AGECAT,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$PARTNER,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$CHILDREC,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$DUALCITREC,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$DEVCAT,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$KAUFMANNCAT,dataset_main_last_obs$EVENT)
prop.table(dataset_main_last_obs$EVENT)


#############     
# Table 3.2 #
#############

#compute survival function
dataset_main$surv_main <- Surv(dataset_main$START, dataset_main$STOP, dataset_main$EVENT)

#cox regression
table3.2 <- coxph(surv_main ~ GENDER + AGEARRIVAL + as.factor(PARTNER) + CHILD + DUALCITREC + DEVELOPMENT + KAUFMANN, data = dataset_main)
